require(tidyverse)
require(lmtest)
require(data.table)
require(plm)
require(lubridate)
require(ivreg)
require(DescTools)
require(rsq)
require(miceadds)
require(jtools)
require(stats)
require(haven)
require(sandwich)
require(fastDummies)
require(sqldf)
require(scales)
require(stringr)
require(MatchIt)
require(modelsummary)
options(modelsummary_format_numeric_latex = "plain")
require(readxl)
library(purrr)
library(broom)
library(clubSandwich)

############################# Initial Data Preparation #############################
# Set Working Directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

#### Load IBES Guidance - Detail file (accessed via WRDS)
guidance <- read.csv("ibes_guidance.csv")

### Initial Filters ###: Keep annual EPS forecasts in USD
# Annual Forecasts
guidance <- subset(guidance, pdicity == "ANN")
# EPS Forecasts
guidance <- subset(guidance, measure == "EPS")
# Forecasts in USD
guidance <- subset(guidance, curr == "USD")
# Point and range forecasts
guidance_range <- subset(guidance, range_desc == 1)
guidance_range$eps_f_range <- 1
guidance_point <- subset(guidance, range_desc == 2)
guidance_point$eps_f_range <- 0
rm(guidance)
# Define EPS forecasts (midpoint for range forecasts)
guidance_range$eps_f <- (guidance_range$val_1 + guidance_range$val_2) / 2
guidance_point$eps_f <- guidance_point$val_1
guidance <- rbind(guidance_point, guidance_range)
rm(guidance_range, guidance_point)
# Rename Guidance Date
guidance <- guidance %>%   rename(guidance_date = anndats)

#### Load IBES Detail History - Actuals file (accessed via WRDS) 
actuals <- read.csv("ibes_actuals.csv")
# Ensure that variables are consistently in lower case
names(actuals) <- tolower(names(actuals))
# Apply same filters: EPS, annual, USD
actuals <- subset(actuals, measure == "EPS")
actuals <- subset(actuals, pdicity == "ANN")
actuals <- subset(actuals, curr_act == "USD")

# Rename Earnings Announcement Date and Realized EPS
actuals <- actuals %>% rename(ea_date = anndats, eps_r = value)

# Get date variables for match
actuals$prd_yr <- substr(actuals$pends, 1, 4)
actuals$prd_mon <- substr(actuals$pends, 6, 7)
actuals$prd_mon <- str_remove(actuals$prd_mon, "^0+")
actuals$fyear_end <- as.Date(actuals$pends)
actuals <- actuals %>% select(ticker, oftic, cusip, fyear_end, eps_r, ea_date, prd_yr, prd_mon)
  
# Merge IBES actuals and guidance file // Require match
guidance <- merge(guidance, actuals, by = c("ticker","prd_yr", "prd_mon"))
rm(actuals)

# IBES CUSIP corresponds to CRSPs NCUSIP - adopt CRSP's terminology from here on
guidance <- guidance %>% rename(ncusip = cusip)

# For subsequent merges based on monthly data: define the month before the guidance date
guidance$guidance_date <- as.Date(guidance$guidance_date)  
guidance$previous_month <- guidance$guidance_date %m-% months(1)
guidance$month <- format(guidance$previous_month, "%Y%m")
guidance$previous_month <- NULL

### Import University of Michigan Consumer Sentiment monthly data (obtained via https://www.sca.isr.umich.edu/) 
sentiment <- read.csv("u_of_m_index.csv")

# Create month variable for subsequent merge
month_lookup <- c("January" = "01", "February" = "02", "March" = "03", "April" = "04",
                  "May" = "05", "June" = "06", "July" = "07", "August" = "08",
                  "September" = "09", "October" = "10", "November" = "11", "December" = "12")
sentiment$numeric_month <- month_lookup[sentiment$Month]
rm(month_lookup)
sentiment$month <- paste0(sentiment$YYYY, sentiment$numeric_month)
sentiment <- sentiment %>% select(month, ICS_ALL)
sentiment <- sentiment %>% rename(sentiment = ICS_ALL)

guidance <- merge(guidance, sentiment, by = "month")
rm(sentiment)

### Import pre-processed stock market data
stocks <- read.csv("data_for_further_analyses.csv", header=FALSE)
stocks <- stocks %>% 
  rename(permno = V1,
    month = V2,
    date = V3,
    size = V4,
    stock_issues = V5,
    beta = V6,
    ivol = V7,
    sue = V8,
    sue_date = V9,
    short = V10,
    turnover = V11,
    bm = V12,
    op = V13,
    ep = V14,
    disp = V15,
    analysts = V16,
    vola = V17,
    ret1m = V18,
    ret2m = V19,
    ret3m = V20,
    ret1yr = V21,
    ret2yr = V22,
    ret3yr = V23,
    ret4yr = V24,
    ret5yr = V25,
    cgo = V26,
    percgain = V27,
    cgo_placebo = V28,
    cgo_daily = V29
  )

# Calculate cross-sectional ranks in full stock data 
stocks <- stocks %>% group_by(month) %>% mutate(cgo_rank = ntile(cgo, 100))
stocks <- stocks %>% group_by(month) %>% mutate(percgain_rank = ntile(percgain, 100))

# Load monthly NCUSIP PERMNO merge-data based obtained via CRSP // Require match 
linking_table <- read.csv("ncsuip-permno-cusip-merge.csv")
names(linking_table) <- tolower(names(linking_table))
stocks <- merge(stocks, linking_table, by = c("permno","date"))
rm(linking_table)

# Merge guidance and stock-based observations at month-end before guidance date
guidance <- merge(guidance,stocks, by = c("ncusip","month")) 
rm(stocks)

# Write all guidance dates to a txt file for WRDS Event Study
guidance_red <- guidance %>% select("permno","guidance_date")
guidance_red <- guidance_red %>% distinct(permno,guidance_date, .keep_all = TRUE)
#write.table(guidance_red, "guidance_ann_permno.txt", sep = " ", row.names = FALSE, col.names = FALSE, quote = FALSE)
rm(guidance_red)

# Import guidance CARs and match with dataset //[-1;+1] CARs relative to FF3 with a 31-day gap based on 255 return obs (at least 30)
guidance_car <- read.csv("guidance_car_permno.csv")
guidance_car <- guidance_car %>% select(permno, evtdate, car)
guidance_car <- guidance_car %>% rename(guidance_date = evtdate, guidance_car = car)
guidance <- merge(guidance, guidance_car, by = c("permno","guidance_date"))
rm(guidance_car)

# Write all earnings announcement dates to a txt file for WRDS Event Study
guidance_red <- guidance %>% select("permno","ea_date")
guidance_red <- guidance_red %>% distinct(permno,ea_date, .keep_all = TRUE)
#write.table(guidance_red, "Event Study/ea_ann_permno.txt", sep = " ", row.names = FALSE, col.names = FALSE, quote = FALSE)
rm(guidance_red)

# Import EA CARs and match with dataset //[-1;+1] CARs relative to FF3 with a 31-day gap based on 255 return obs (at least 30)
ea_car <- read.csv("ea_car_permno.csv")
ea_car <- ea_car %>% select(permno, evtdate, car)
ea_car <- ea_car %>% rename(ea_date = evtdate, ea_car = car)
guidance <- merge(guidance, ea_car, by = c("permno","ea_date"))
rm(ea_car)

### Import CRSP/Compustat Merged Fundamentals Annual file (accessed via WRDS) for prior fyear-end stock price
compustat <- read.csv("crsp_compustat.csv")
names(compustat) <- tolower(names(compustat))
compustat <- compustat %>% arrange(gvkey, datadate) %>% mutate(price = prcc_f / adjex_f) %>%  group_by(gvkey) %>%mutate(price_prev = lag(price)) %>% ungroup()
compustat$prd_yr <- substr(compustat$datadate, 1, 4)
compustat$prd_mon <- substr(compustat$datadate, 6, 7)
compustat$prd_mon <- str_remove(compustat$prd_mon, "^0+")
compustat <- compustat %>% rename(permno = lpermno, cusip_9 = cusip)
compustat$cusip <- substr(compustat$cusip_9, 1,8)
compustat <- compustat %>% select(permno, gvkey, prd_yr, prd_mon, price_prev, sic)
guidance <- merge(guidance, compustat, by = c("permno","prd_yr","prd_mon"))
rm(compustat)

### Apply final filters
# Limit to guidance within 365 days of firm-year end
guidance$horizon <- as.numeric(guidance$fyear_end - guidance$guidance_date)
guidance <- subset(guidance, horizon >= 1)
guidance <- subset(guidance, horizon < 365)

# Exclude utilities and regulated industries (SIC 4900 - 4949)
guidance <- subset(guidance, sic < 4900 | sic > 4949)
# Exclude financial services (SIC 6000 - 6999)
guidance <- subset(guidance, sic < 6000 | sic > 6999)

# First forecast by year 
guidance <- guidance %>%
  group_by(permno, fyear_end) %>%
  slice_min(guidance_date, with_ties = TRUE) %>%
  ungroup()

# Tie-breaking rule
guidance <- guidance %>%
  arrange(permno, fyear_end, mod_date, mod_time) %>%
  group_by(permno, fyear_end) %>%
  slice_tail(n = 1) 

# Date restriction
guidance <- subset(guidance, year(guidance_date) >= 2001 & year(guidance_date) <= 2023)

# Drop small stocks
guidance <- subset(guidance, price_prev >= 10)

### Final variable definitions
# Scale up dependent variables
guidance$bias <- ((guidance$eps_f - guidance$eps_r) / guidance$price_prev) * 100
guidance$ea_car <- guidance$ea_car * 100
guidance$guidance_car <- guidance$guidance_car * 100

# Log of size and horizon 
guidance$size <- log(1000 * guidance$size)
guidance$horizon <- log(guidance$horizon)

# Loss dummy
guidance$loss_d <- ifelse(guidance$ep < 0, 1,0)

# SIC-based process risk
guidance$process <- ifelse(guidance$sic >= 2833 & guidance$sic <= 2836 | guidance$sic >= 8731 & guidance$sic <= 8734 | guidance$sic >= 3570 & guidance$sic <= 3577 | guidance$sic >= 7371 & guidance$sic <= 7379 | guidance$sic >= 3600 & guidance$sic <= 3674 | guidance$sic >= 5200 & guidance$sic <= 5961, 1, 0)

# Dummy variables:
# Industry
guidance$sic_two <- factor(substr(guidance$sic, 1, 2))
# Year
guidance$year <- factor(guidance$prd_yr)
# Firm
guidance$permno <- factor(guidance$permno)
# Net equity issuer
guidance$stock_issues_d <- ifelse(guidance$stock_issues > 0, 1, 0)

# Lagged guidance bias
setDT(guidance)
guidance[, prev_bias := {
  b_lag <- shift(bias)
  y_lag <- shift(prd_yr)
  fifelse(prd_yr == y_lag + 1L & !is.na(b_lag), b_lag, 0)
}, by = permno]

# Drop if missing CGO, Guidance Bias, Guidance CAR or EA CAR
guidance <- subset(guidance, !is.na(bias) & !is.na(cgo) & !is.na(guidance_car) & !is.na(ea_car))

# Cumulate return variables
guidance$ret3m_c <- (1+guidance$ret1m) * (1+guidance$ret2m) * (1+guidance$ret3m)-1
guidance$ret3yr_c <- (1+guidance$ret1yr) * (1+guidance$ret2yr) * (1+guidance$ret3yr)-1
guidance$ret5yr_c <- (1+guidance$ret1yr) * (1+guidance$ret2yr) * (1+guidance$ret3yr)* (1+guidance$ret4yr)* (1+guidance$ret5yr)-1

guidance$cgo_non_w <- guidance$cgo

### Winsorization
winsor_vars <- c("bias","guidance_car","ea_car","cgo","beta","size","bm","horizon","prev_bias","op","analysts","disp","ivol","stock_issues","sentiment","turnover","short","vola","cgo_placebo","percgain","ret1yr","ret1m","ret3m_c","ret3yr_c","ret5yr_c","cgo_daily")
guidance <- guidance %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))

### Dummies for interaction tests
median_dummy_vars <- c("cgo","turnover","beta","short","vola","ivol","disp","horizon")
guidance <- guidance %>%
  mutate(across(
    .cols  = all_of(median_dummy_vars),
    .fns   = ~ as.integer(.x > median(.x, na.rm = TRUE)),
    .names = "{.col}_d"
  ))

### Define different sets of controls
controls_1 <- c("beta","size","bm","sic_two","year")
controls_2 <- c(controls_1, "horizon","loss_d","process","prev_bias")
controls_3 <- c(controls_2, "op","analysts","disp","vola","stock_issues_d","sentiment")
controls_4 <- c("beta","size","bm","horizon","loss_d","prev_bias","op","analysts","disp","vola","stock_issues_d","sentiment","permno","year")

controls <- list(
  controls_1 = controls_1,
  controls_2 = controls_2,
  controls_3 = controls_3,
  controls_4 = controls_4
)

#write.table(guidance, "actoeg_data.txt", sep = ",", row.names = FALSE, col.names = TRUE, quote = FALSE)

############################# Data Analysis - Main Paper #############################
############################# Table 1 #############################
guidance_sumstat <- guidance %>% select(bias, guidance_car, ea_car, cgo, beta, size,bm, horizon, loss_d, process, prev_bias, op, analysts, disp, vola, stock_issues_d, sentiment, turnover, short)

datasummary(
  All(guidance_sumstat) ~
    Mean + SD + P25 + Median + P75 + N,
  data = guidance_sumstat,
  fmt = "%.4f",
  title = "Summary Statistics",
  label = "tab:sumstats",
  output = "latex_tabular"
)

############################# Table 2 #############################
c1 <- lm(bias ~ cgo + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)

modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table 3 #############################
to <- lm(data = guidance, update(bias ~ cgo * turnover_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_to <- sqrt(diag(vcovCL(to, cluster = ~ permno + year)))

rsi <- lm(data = guidance, update(bias ~ cgo * short_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_rsi <- sqrt(diag(vcovCL(rsi, cluster = ~ permno + year)))

beta <- lm(data = guidance, update(bias ~ cgo * beta_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_beta <- sqrt(diag(vcovCL(beta, cluster = ~ permno + year)))

vola <- lm(data = guidance, update(bias ~ cgo * vola_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_vola <- sqrt(diag(vcovCL(vola, cluster = ~ permno + year)))

disp <- lm(data = guidance, update(bias ~ cgo * disp_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_disp <- sqrt(diag(vcovCL(disp, cluster = ~ permno + year)))

hori <- lm(data = guidance, update(bias ~ cgo * horizon_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_hori <- sqrt(diag(vcovCL(hori, cluster = ~ permno + year)))

modelsummary(list(to, rsi, beta, vola, disp, hori), vcov = function(x) vcovCL(x, cluster = ~ permno + year), coef_map = c("cgo", "cgo:turnover_d", "cgo:short_d", "cgo:beta_d", "cgo:vola_d", "cgo:disp_d", "cgo:horizon_d", "turnover_d", "short_d", "beta_d", "vola_d", "disp_d", "horizon_d"), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), fmt = 4, output = "latex", title = "Guidance Bias", gof_omit = "AIC|BIC|Log.Lik|F|RMSE", escape = FALSE, add_rows = tibble::tibble(term = c("Firm Controls", "Year Fixed Effects", "Industry Fixed Effects", "Adjusted R$^2$"), `(1)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(to)$adj.r.squared)), `(2)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(rsi)$adj.r.squared)), `(3)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(beta)$adj.r.squared)), `(4)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(vola)$adj.r.squared)), `(5)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(disp)$adj.r.squared)), `(6)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(hori)$adj.r.squared))))
rm(to, rsi, vola,  disp, hori, se_to, se_rsi, se_vola, se_disp, se_hori)

############################# Table 4 #############################
c1 <- lm(data = guidance, update(bias ~ cgo + ret1m, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ cgo + ret3m_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ cgo + ret1yr, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ cgo + ret3yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = guidance, update(bias ~ cgo + ret1m + ret3m_c + ret1yr + ret3yr_c + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

c7 <- lm(data = guidance, update(bias ~ cgo + cgo_placebo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_7 <- sqrt(diag(vcovCL(c7, cluster = ~ permno + year)))

modelsummary(list(c1, c2, c3, c4, c5, c6, c7), vcov = function(x) vcovCL(x, cluster = ~ permno + year), coef_map = c("cgo", "ret1m", "ret3m_c", "ret1yr", "ret3yr_c", "ret5yr_c", "cgo_placebo"), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), fmt = 2, output = "latex", title = "Guidance Bias and CGO", gof_omit = "AIC|BIC|Log.Lik|F|RMSE|R2|Adj", escape = FALSE)
rm(c1, c2, c3, c4, c5, c6, se_1, se_2, se_3, se_4, se_5, se_6)

############################# Table 5 (also the data for Figure 3) #############################
# Create quintiles based on cross-sectional rank
PF <- guidance[order(guidance$cgo_rank),]
high_cgo <- PF[PF$cgo_rank > 80,]
PF_4 <- PF[PF$cgo_rank <= 80 & PF$cgo_rank > 60,]
PF_3 <- PF[PF$cgo_rank <= 60 & PF$cgo_rank > 40,]
PF_2 <- PF[PF$cgo_rank <= 40 & PF$cgo_rank > 20,]
low_cgo <- PF[PF$cgo_rank <= 20,]

# Guidance Bias# 
mean(low_cgo$bias)
mean(PF_2$bias)
mean(PF_3$bias)
mean(PF_4$bias)
mean(high_cgo$bias)

# Guidance CAR # 
mean(low_cgo$guidance_car)
mean(PF_2$guidance_car)
mean(PF_3$guidance_car)
mean(PF_4$guidance_car)
mean(high_cgo$guidance_car)

# Earnings Announcement CAR #
mean(low_cgo$ea_car)
mean(PF_2$ea_car)
mean(PF_3$ea_car)
mean(PF_4$ea_car)
mean(high_cgo$ea_car)

# Test differences between low and high-CGO #
PF_long_short <- PF[PF$cgo_rank > 80 | PF$cgo_rank <= 20,]
PF_long_short$dummy <- ifelse(PF_long_short$cgo_rank > 80, 1, 0)

bias <- lm(bias ~ dummy, data = PF_long_short)
se_bias <- sqrt(diag(vcovCL(bias, cluster = ~ permno + year)))

car <- lm(guidance_car ~ dummy, data = PF_long_short)
se_car <- sqrt(diag(vcovCL(car, cluster = ~ permno + year)))

car_ea <- lm(ea_car ~ dummy, data = PF_long_short)
se_ea <- sqrt(diag(vcovCL(car_ea, cluster = ~ permno + year)))

modelsummary(list(bias, car, car_ea), vcov = function(x) vcovCL(x, cluster = ~ permno + year), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), fmt = 4, output = "latex", gof_omit = "AIC|BIC|Log.Lik|F|RMSE|R2|Adj", escape = FALSE)
rm(bias, car, car_ea,se_bias, se_car, se_ea )
rm(low_cgo, high_cgo, PF_2, PF_3, PF_4)

############################# Table 6 #############################
c1 <- lm(guidance_car ~ cgo + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(guidance_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(guidance_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(ea_car ~ cgo + sic_two + year, data = guidance)
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(ea_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = guidance, update(ea_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

modelsummary(list("(1)" = c1, "(2)" = c2, "(3)" = c3, "(4)" = c4, "(5)" = c5, "(6)" = c6), vcov = function(x) vcovCL(x, cluster = ~ permno + year), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), coef_omit = "year|sic_two", gof_omit = "AIC|BIC|Log.Lik|RMSE", fmt = 4, output = "latex", title = "Guidance Bias and CAR", escape = FALSE)
rm(c1, c2, c3, c4, c5, c6, se_1, se_2, se_3, se_4, se_5, se_6)

############################# Data Analysis - Online Appendix #############################
############################# Table A1 #############################
c1 <- lm(bias ~ percgain + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ percgain, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ percgain, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ percgain, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ percgain, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

modelsummary(list(c1, c2, c3, c4, c5), vcov = function(x) vcovCL(x, cluster = ~ permno + year), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), coef_omit = "year|sic_two|permno", gof_omit = "AIC|BIC|Log.Lik|F|RMSE|R2|Adj", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A2 #############################
to <- lm(data = guidance, update(bias ~ percgain * turnover_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_to <- sqrt(diag(vcovCL(to, cluster = ~ permno + year)))

rsi <- lm(data = guidance, update(bias ~ percgain * short_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_rsi <- sqrt(diag(vcovCL(rsi, cluster = ~ permno + year)))

beta <- lm(data = guidance, update(bias ~ percgain * beta_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_beta <- sqrt(diag(vcovCL(beta, cluster = ~ permno + year)))

vola <- lm(data = guidance, update(bias ~ percgain * vola_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_vola <- sqrt(diag(vcovCL(vola, cluster = ~ permno + year)))

disp <- lm(data = guidance, update(bias ~ percgain * disp_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_disp <- sqrt(diag(vcovCL(disp, cluster = ~ permno + year)))

hori <- lm(data = guidance, update(bias ~ percgain * horizon_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_hori <- sqrt(diag(vcovCL(hori, cluster = ~ permno + year)))

modelsummary(list(to, rsi, beta, vola, disp, hori), vcov = function(x) vcovCL(x, cluster = ~ permno + year), coef_map = c("percgain","percgain:turnover_d","percgain:short_d","percgain:beta_d","percgain:vola_d","percgain:disp_d","percgain:horizon_d","turnover_d","short_d","beta_d","vola_d","disp_d","horizon_d"), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), fmt = 2, output = "latex", title = "Guidance Bias", gof_omit = "AIC|BIC|Log.Lik|F|RMSE|R2|Adj", escape = FALSE)
rm(to, rsi, beta, vola, disp, hori, se_to, se_rsi, se_beta, se_vola , se_disp, se_hori)

############################# Table A3 #############################
c1 <- lm(data = guidance, update(bias ~ percgain + ret1m, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ percgain + ret3m_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ percgain + ret1yr, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ percgain + ret3yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ percgain + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = guidance, update(bias ~ percgain + ret1m + ret3m_c + ret1yr + ret3yr_c + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

c7 <- lm(data = guidance, update(bias ~ percgain + cgo_placebo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_7 <- sqrt(diag(vcovCL(c7, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5,"(6)"=c6,"(7)"=c7), vcov = ~ permno + year, coef_keep = "percgain|ret1m|ret3m_c|ret1yr|ret3yr_c|ret5yr_c|cgo_placebo", estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*"=0.10,"**"=0.05,"***"=0.01), gof_omit = "AIC|BIC|Log.Lik|RMSE|R2", fmt = 2, output = "latex", title = "Guidance Bias and CGO", escape = FALSE)
rm(c1, c2, c3, c4, c5, c6, c7, se_1, se_2, se_3, se_4, se_5, se_6, se_7)

############################# Table A4 #############################
PF <- guidance[order(guidance$percgain_rank),]
high_percgain <- PF[PF$percgain_rank > 80,]
PF_4 <- PF[PF$percgain_rank <= 80 & PF$percgain_rank > 60,]
PF_3 <- PF[PF$percgain_rank <= 60 & PF$percgain_rank > 40,]
PF_2 <- PF[PF$percgain_rank <= 40 & PF$percgain_rank > 20,]
low_percgain <- PF[PF$percgain_rank <= 20,]

# Guidance Bias# 
mean(low_percgain$bias)
mean(PF_2$bias)
mean(PF_3$bias)
mean(PF_4$bias)
mean(high_percgain$bias)

# Guidance CAR # 
mean(low_percgain$guidance_car)
mean(PF_2$guidance_car)
mean(PF_3$guidance_car)
mean(PF_4$guidance_car)
mean(high_percgain$guidance_car)

# Earnings Announcement CAR #
mean(low_percgain$ea_car)
mean(PF_2$ea_car)
mean(PF_3$ea_car)
mean(PF_4$ea_car)
mean(high_percgain$ea_car)

# Test differences between low and high-CGO #
PF_long_short <- PF[PF$percgain_rank > 80 | PF$percgain_rank <= 20,]
PF_long_short$dummy <- ifelse(PF_long_short$percgain_rank > 80, 1, 0)

bias <- lm(bias ~ dummy, data = PF_long_short)
se_bias <- sqrt(diag(vcovCL(bias, cluster = ~ permno + year)))

car <- lm(guidance_car ~ dummy, data = PF_long_short)
se_car <- sqrt(diag(vcovCL(car, cluster = ~ permno + year)))

car_ea <- lm(ea_car ~ dummy, data = PF_long_short)
se_ea <- sqrt(diag(vcovCL(car_ea, cluster = ~ permno + year)))

modelsummary(list(bias, car, car_ea), vcov = function(x) vcovCL(x, cluster = ~ permno + year), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), fmt = 4, output = "latex", gof_omit = "AIC|BIC|Log.Lik|F|RMSE|R2|Adj", escape = FALSE)
rm(bias, car, car_ea,se_bias, se_car, se_ea )
rm(low_percgain, high_percgain, PF_2, PF_3, PF_4)

############################# Table A5 #############################
c1 <- lm(guidance_car ~ percgain + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(guidance_car ~ percgain, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(guidance_car ~ percgain, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(ea_car ~ percgain + sic_two + year, data = guidance)
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(ea_car ~ percgain, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = guidance, update(ea_car ~ percgain, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5,"(6)"=c6), vcov = vcov_fun, coef_omit = "year|sic_two", estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*"=0.10,"**"=0.05,"***"=0.01), gof_omit = "AIC|BIC|Log.Lik|RMSE|R2", fmt = 4, output = "latex", title = "Guidance Bias and CAR", escape = FALSE)
rm(c1, c2, c3, c4, c5, c6, se_1, se_2, se_3, se_4, se_5, se_6)

############################# Table A8 #############################
# Load quarterly IBES Detail History - Actuals file (accessed via WRDS)
ibes_actuals_qtr <- read.csv("ibes_actuals_qtr.csv")
names(ibes_actuals_qtr) <- tolower(names(ibes_actuals_qtr))
ibes_actuals_qtr <- subset(ibes_actuals_qtr, cusip != "")
ibes_actuals_qtr <- ibes_actuals_qtr %>% rename(ncusip = cusip)
# Add permno firm identifier
linking_table <- read.csv("ncsuip-permno-cusip-merge.csv")
names(linking_table) <- tolower(names(linking_table))
linking_table$pends <- ymd(linking_table$date)
ibes_actuals_qtr$pends <- ymd(ibes_actuals_qtr$pends) 
ibes_actuals_qtr <- merge(ibes_actuals_qtr, linking_table, by = c("ncusip","pends"))
rm(linking_table)
#ibes_actuals_qtr <- ibes_actuals_qtr[, c("permno", "anndats")]
#write.table(ibes_actuals_qtr, file = "OA Data/qrt_ea_events_permno.txt", sep = " ", row.names = FALSE, col.names = FALSE, quote = FALSE)
#rm(ibes_actuals_qtr)
# Load market reactions around intermediate earnings announcements // obtained via WRDS Event Study
qrt_ea_car <- read.csv("ea_qrt_car_permno.csv")
names(qrt_ea_car) <- tolower(names(qrt_ea_car))
qrt_ea_car <- qrt_ea_car %>% select(permno, evtdate, car)
qrt_ea_car <- qrt_ea_car %>% rename(qrt_ea_date = evtdate, qrt_ea_car = car)
qrt_ea_car$qrt_ea_date <- ymd(qrt_ea_car$qrt_ea_date)
qrt_ea_car$qrt_ea_car <- qrt_ea_car$qrt_ea_car * 100
qrt_ea_car$permno <- as.factor(qrt_ea_car$permno)
# Keep earnings announcements after the guidance date and before the associated final earnings announcements
guidance_eaqrt <- guidance %>%
  inner_join(qrt_ea_car, by = "permno") %>%
  filter(qrt_ea_date > guidance_date & qrt_ea_date < ea_date)
rm(qrt_ea_car)
# Identify annual EPS guidance at intermediate earnings announcements
guid <- read.csv("ibes_guidance.csv")
guid <- subset(guid, measure == "EPS")
guid <- subset(guid, pdicity == "ANN")
guid <- subset(guid, range_desc == 1 | range_desc == 2)
guid$eps_f_qrt <- ifelse(guid$range_desc == 1, (guid$val_1 + guid$val_2)/2, guid$val_1)
guid$qrt_ea_date <- ymd(guid$anndats)
guid <- guid %>% select(ticker, qrt_ea_date, eps_f_qrt, prd_yr, prd_mon)
guid <- unique(guid)
guidance_eaqrt <- merge(guidance_eaqrt, guid, by = c("ticker","qrt_ea_date","prd_yr","prd_mon"))
# Define EPS guidance bias at intermediate earnings announcements
guidance_eaqrt$bias_qrt <-  ((guidance_eaqrt$eps_f_qrt - guidance_eaqrt$eps_r) / guidance_eaqrt$price_prev) * 100
# Focus on first intermediate earnings announcement only
guidance_eaqrt_first <- guidance_eaqrt %>% group_by(permno, guidance_date, ea_date) %>% slice_min(order_by = qrt_ea_date, n = 1, with_ties = FALSE) %>% ungroup()
# Winsorize
winsor_vars <- c("bias_qrt")
guidance_eaqrt_first <- guidance_eaqrt_first %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))

c1 <- lm(bias_qrt ~ cgo + sic_two + year, data = guidance_eaqrt_first)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_eaqrt_first, update(bias_qrt ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_eaqrt_first, update(bias_qrt ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_eaqrt_first, update(bias_qrt ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_eaqrt_first, update(bias_qrt ~ cgo, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5), vcov = ~ permno + year, coef_omit = "year|sic_two|permno", estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*"=0.10,"**"=0.05,"***"=0.01), gof_omit = "AIC|BIC|Log.Lik|RMSE|R2", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)
rm(ibes_actuals_qtr, guidance_eaqrt, guidance_eaqrt_first)

############################# Table A10 #############################
# Create median-based sentiment dummy
guidance$sentiment_d <- ifelse(guidance$sentiment > median(guidance$sentiment, na.rm = TRUE), 1, 0)

# Exclude sentiment from controls set if we already include sentiment dummy
controls_3_exsent <- c(controls_2, "op","analysts","disp","vola","stock_issues_d")
controls_4_exsent <- c("beta","size","bm","horizon","loss_d","prev_bias","op","analysts","disp","vola","stock_issues_d","permno","year")

c1 <- lm(bias ~ cgo * sentiment_d + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ cgo * sentiment_d, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ cgo * sentiment_d, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ cgo * sentiment_d, as.formula(paste(". ~ . +", paste(controls_3_exsent, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ cgo * sentiment_d, as.formula(paste(". ~ . +", paste(controls_4_exsent, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5), vcov=function(x) vcovCL(x, cluster=~permno+year), coef_omit="year|sic_two|permno", estimate="{estimate}{stars}", statistic="({std.error})", stars=c("*"=0.10,"**"=0.05,"***"=0.01), gof_omit="AIC|BIC|Log.Lik|RMSE|R2", fmt=4, output="latex", title="Guidance Bias and CGO", escape=FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A11 #############################
# Get annual total compensation from Compustat Executive Compensation - Annual Compensation file (accessed via WRDS)
exec <- read.csv("execucomp_anncomp.csv")
names(exec) <- tolower(names(exec))
# Focus on CEOs
exec <- subset(exec, ceoann == "CEO")
exec <-  exec %>% select(gvkey, year, becameceo, co_per_rol, joined_co, gender, shrown_tot_pct) 
exec <- exec %>% rename(prd_yr = year)
# Define new Execucomp variables
exec$female_d <- ifelse(exec$gender == "MALE", 0, 1)
exec$ceo_pct <- ifelse(is.na(exec$shrown_tot_pct),0,exec$shrown_tot_pct)
guidance_exec <- merge(guidance, exec, by = c("prd_yr","gvkey"))
rm(exec)
winsor_vars <- c("ceo_pct")
guidance_exec <- guidance_exec %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))

c1 <- lm(bias ~ cgo * ceo_pct + female_d + sic_two + year, data = guidance_exec)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_exec, update(bias ~ cgo * ceo_pct + female_d, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_exec, update(bias ~ cgo * ceo_pct + female_d, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_exec, update(bias ~ cgo * ceo_pct + female_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_exec, update(bias ~ cgo * ceo_pct + female_d, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5), vcov = function(x) vcovCL(x, cluster = ~ permno + year), coef_omit = "year|sic_two|permno", coef_map = NULL, estimate = "{estimate}{stars}", statistic = "({std.error})", stars = c("*"=0.10,"**"=0.05,"***"=0.01), gof_omit = "AIC|BIC|Log.Lik|RMSE|R2", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)
rm(guidance_exec)

############################# Table A12 #############################
# Helper function for date handling 
parse_any_date <- function(x) {
  if (inherits(x, "Date")) return(x)
  if (is.factor(x)) x <- as.character(x)
  
  if (is.character(x)) {
    x_trim <- trimws(x)
    out <- as.Date(x_trim)
    bad <- is.na(out) & nzchar(x_trim)
    if (any(bad)) {
      ymd <- suppressWarnings(as.Date(x_trim[bad], format = "%Y%m%d"))
      out[bad] <- ymd
    }
    return(out)
  }
  
  if (is.numeric(x)) {
    out <- rep(as.Date(NA), length(x))
    
    idx_ymd <- !is.na(x) & x >= 19000101 & x <= 21001231
    if (any(idx_ymd)) out[idx_ymd] <- as.Date(as.character(as.integer(x[idx_ymd])), format = "%Y%m%d")
    
    idx_stata <- !is.na(x) & x >= 0 & x < 60000
    idx_stata <- idx_stata & is.na(out)
    if (any(idx_stata)) out[idx_stata] <- as.Date("1960-01-01") + as.integer(round(x[idx_stata]))
    
    return(out)
  }
  
  as.Date(x)
}
# Get Compustat Executive Compensation - Outstanding Equity Awards file (accessed via WRDS)
exec_eqawards <- read.csv("exec_outstanding_equity_awards.csv", stringsAsFactors = FALSE)
compustat <- read.csv("crsp_compustat.csv", stringsAsFactors = FALSE)
names(compustat) <- tolower(names(compustat))
compustat <- compustat %>%
  select(gvkey, fyear, datadate) %>%
  rename(year = fyear) %>%
  mutate(year_end_date = parse_any_date(datadate)) %>%
  select(-datadate)
exec_eqawards <- exec_eqawards %>%
  inner_join(compustat, by = c("gvkey","year"))
# Define date five year prior to expiration
exec_eqawards <- exec_eqawards %>%
  mutate(
    expirationdate      = parse_any_date(exdate), 
    expirationdate_five = expirationdate - as.integer(round(365.25 * 5))
  )
# Define option moneyness
exec_eqawards <- exec_eqawards %>%
  mutate(
    expric = suppressWarnings(as.numeric(expric)),
    prccf = suppressWarnings(as.numeric(prccf)),
    opts_unex_exer = suppressWarnings(as.numeric(opts_unex_exer)),
    moneyness = ifelse(
      opts_unex_exer > 0 & !is.na(prccf) & !is.na(expric) & expric > 0,
      prccf / expric - 1,
      NA_real_
    )
  ) %>%
  filter(!is.na(moneyness) & is.finite(moneyness)) %>%
  filter(!is.na(expirationdate) & !is.na(expirationdate_five) & !is.na(year_end_date))
# Reduce dataset to relevant variables
exec_eqawards <- exec_eqawards %>%
  select(gvkey, execid, year, outawdnum, expric, expirationdate, expirationdate_five,
         opts_unex_exer, year_end_date, moneyness) %>%
  arrange(gvkey, execid, expirationdate, year)
#Flag the first observation of each executive-option package 
exec_eqawards <- exec_eqawards %>%
  group_by(execid, expirationdate) %>%
  arrange(year, .by_group = TRUE) %>%
  mutate(
    expdiff = as.integer(expirationdate - year_end_date),
    first_new = ifelse(
      row_number() == 1 |
        ( (year - 1 != dplyr::lag(year)) & (expdiff > 365) & (year <= 2024) ),
      1L, 0L
    )
  ) %>%
  ungroup() %>%
  select(-expdiff)
# Identify full exercises
exec_eqawards <- exec_eqawards %>%
  group_by(execid, expirationdate) %>%
  arrange(year, .by_group = TRUE) %>%
  mutate(exercised = dplyr::lead(first_new)) %>%
  ungroup()
# Identify partial exercises
exec_eqawards <- exec_eqawards %>%
  group_by(execid, expirationdate) %>%
  arrange(year, .by_group = TRUE) %>%
  mutate(
    partly_exercised = ifelse(
      !is.na(dplyr::lag(opts_unex_exer)) &
        (year - 1 == dplyr::lag(year)) &
        (as.integer(expirationdate - year_end_date) > 365) &
        (year <= 2024) &
        (opts_unex_exer < dplyr::lag(opts_unex_exer)),
      1L, 0L
    )
  ) %>%
  ungroup()
# Focus on deep ITM options with five years to expiration
exec_eqawards <- exec_eqawards %>%
  filter(moneyness > 0.67) %>%
  filter(expirationdate_five >= (year_end_date - 365),
         expirationdate_five <= year_end_date)
# Holder67: if a deep ITM option with 5 years to expiration is not exercised
exec_eqawards <- exec_eqawards %>%
  mutate(
    holder67 = dplyr::case_when(
      exercised == 1L | partly_exercised == 1L ~ 0L,
      exercised == 0L & partly_exercised == 0L ~ 1L,
      TRUE ~ NA_integer_
    )
  )
# Count instances of non-exercises
holder67_exec_year <- exec_eqawards %>%
  group_by(execid, year) %>%
  summarise(
    holder67_y = if (all(is.na(holder67))) NA_integer_ else max(holder67, na.rm = TRUE),
    .groups = "drop"
  )
# Summarize by CEO
holder67_exec <- holder67_exec_year %>%
  group_by(execid) %>%
  summarise(
    holder67_sum   = sum(holder67_y == 1L, na.rm = TRUE),
    first_year_h67 = ifelse(holder67_sum > 0, min(year[holder67_y == 1L], na.rm = TRUE), NA_real_),
    .groups = "drop"
  )
# Get full Execucomp panel for CEO flag
exec <- read.csv("execucomp_anncomp.csv")
names(exec) <- tolower(names(exec))
# Limit sample to start in 2006 due to data availability
exec <- subset(exec, year >= 2006)
# Focus on CEOs
exec <- subset(exec, ceoann == "CEO")
exec <- exec %>% select(gvkey, execid, year)
exec <- merge(exec, holder67_exec, by = c("execid"))
# Define final overconfidence measure
exec$oc_h67 <- ifelse(exec$holder67_sum >= 2 & exec$year >= exec$first_year_h67, 1, 0)
exec <- exec %>% rename(prd_yr = year)
guidance_exec <- merge(guidance, exec, by = c("gvkey","prd_yr"))

c1 <- lm(bias ~ cgo * oc_h67 + sic_two + year, data = guidance_exec)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_exec, update(bias ~ cgo * oc_h67, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_exec, update(bias ~ cgo * oc_h67, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_exec, update(bias ~ cgo * oc_h67, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_exec, update(bias ~ cgo * oc_h67, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)

modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)
rm(exec_eqawards, guidance_exec, holder67_exec, holder67_exec_year)
############################# Table A13 #############################
stocks <- read.csv("data_for_further_analyses.csv", header=FALSE)
stocks <- stocks %>% 
  rename(permno = V1,
         month = V2,
         date = V3,
         size = V4,
         stock_issues = V5,
         beta = V6,
         ivol = V7,
         sue = V8,
         sue_date = V9,
         short = V10,
         turnover = V11,
         bm = V12,
         op = V13,
         ep = V14,
         disp = V15,
         analysts = V16,
         vola = V17,
         ret1m = V18,
         ret2m = V19,
         ret3m = V20,
         ret1yr = V21,
         ret2yr = V22,
         ret3yr = V23,
         ret4yr = V24,
         ret5yr = V25,
         cgo = V26,
         percgain = V27,
         cgo_placebo = V28,
         cgo_daily = V29
  )

stocks <- stocks %>% select(permno, sue, sue_date)
stocks <- unique(stocks)
stocks$guidance_date <- ymd(stocks$sue_date)
stocks$permno <- as.character(stocks$permno)
stocks <- stocks %>% rename(sue_concurrent = sue)
stocks <- stocks %>% select(permno,guidance_date,sue_concurrent)
guidance_ea <- merge(guidance, stocks, by = c("permno","guidance_date"))
guidance_no_ea <- guidance %>%  anti_join(stocks, by = c("permno", "guidance_date"))

c1 <- lm(bias ~ cgo + sic_two + year, data = guidance_no_ea)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_no_ea, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_no_ea, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_no_ea, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_no_ea, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5), vcov=function(x) vcovCL(x, cluster=~permno+year), estimate="{estimate}{stars}", statistic="({std.error})", stars=c("*"=0.10,"**"=0.05,"***"=0.01), coef_omit="year|sic_two|permno", coef_map=NULL, gof_omit="AIC|BIC|Log.Lik|RMSE|R2", fmt=4, output="latex", title="Guidance Bias and CGO", escape=FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A14 #############################
c1 <- lm(bias ~ cgo + sue_concurrent + sic_two + year, data = guidance_ea)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_ea, update(bias ~ cgo + sue_concurrent, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_ea, update(bias ~ cgo + sue_concurrent, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_ea, update(bias ~ cgo + sue_concurrent, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_ea, update(bias ~ cgo + sue_concurrent, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vc <- partial(sandwich::vcovCL, cluster = ~ permno + year)
modelsummary(list("1"=c1,"2"=c2,"3"=c3,"4"=c4,"5"=c5), vcov = vc, coef_keep = "cgo|sue_concurrent", coef_omit = "permno|year|sic_two", estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*"=.10,"**"=.05,"***"=.01), gof_keep = "Adj.R2", gof_omit = "AIC|BIC|Log.Lik|RMSE|R2$", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)
rm(guidance_ea, guidance_no_ea, stocks)

############################# Table A15 #############################
# Load quarterly institutional holdings
insti <- read.csv("insti.csv")
# Get the next quarter
insti <- insti %>% group_by(permno) %>% mutate(qrt_lead = dplyr::lead(date, n = 1, default = NA))
insti$year <- substr(insti$qrt_lead, 1, 4)
insti$qrt <- substr(insti$qrt_lead, 6, 6)
# Fill monthly observations based on quarterly data based on last publicly available data
insti <- insti %>%
  uncount(weights = 3, .id = "seq") 
insti$month <- NA
insti$month <- ifelse(insti$qrt == 1, insti$seq, insti$month)
insti$month <- ifelse(insti$qrt == 2, insti$seq + 3, insti$month)
insti$month <- ifelse(insti$qrt == 3, insti$seq + 6, insti$month)
insti$month <- ifelse(insti$qrt == 4, insti$seq + 9, insti$month)
insti$month <- str_pad(insti$month, width = 2, pad = "0")
insti$month <- str_c(insti$year, insti$month)
insti <- insti %>% select(permno, month, insti)
insti$permno <- as.character(insti$permno)
guidance_insti <- merge(guidance, insti, by = c("permno","month"))
rm(insti)
guidance_insti$insti_d <- ifelse(guidance_insti$insti > 0.5, 1, 0)

c1 <- lm(bias ~ cgo * insti_d + sic_two + year, data = guidance_insti)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_insti, update(bias ~ cgo * insti_d, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_insti, update(bias ~ cgo * insti_d, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_insti, update(bias ~ cgo * insti_d, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_insti, update(bias ~ cgo * insti_d, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5), vcov = function(x) vcovCL(x, cluster = ~ permno + year), coef_omit = "year|sic_two|permno", coef_map = NULL, estimate = "{estimate}{stars}", statistic = "({std.error})", stars = c("*"=0.10,"**"=0.05,"***"=0.01), gof_omit = "AIC|BIC|Log.Lik|F|RMSE|R2|SER", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)
rm(guidance_insti)

############################# Table A16 #############################
# Get Execucomp data
exec <- read.csv("execucomp_anncomp.csv")
names(exec) <- tolower(names(exec))
# Focus on CEOs only
exec <- subset(exec, ceoann == "CEO")
exec <-  exec %>% select(gvkey, year, becameceo, co_per_rol, joined_co) 
exec <- exec %>% rename(prd_yr = year)
guidance_exec <- merge(guidance, exec, by = c("prd_yr","gvkey"))
rm(exec)
# Require information on job start
guidance_exec$start_ceo <- ymd(guidance_exec$becameceo)
guidance_exec$start_firm <- ymd(guidance_exec$joined_co)
guidance_exec <- subset(guidance_exec, !is.na(start_ceo))
guidance_exec <- subset(guidance_exec, !is.na(start_firm))
guidance_exec$tenure_days_ceo <- guidance_exec$guidance_date - guidance_exec$start_ceo
guidance_exec$tenure_days_ceo <- as.numeric(guidance_exec$tenure_days)
guidance_exec$tenure_days_firm <- guidance_exec$guidance_date - guidance_exec$start_firm
guidance_exec$tenure_days_firm <- as.numeric(guidance_exec$tenure_days_firm)
guidance_exec <- subset(guidance_exec, tenure_days_ceo > 0)
# Define newly appointed CEOs
guidance_exec$ceo_new_halfyr <- ifelse(guidance_exec$tenure_days_ceo < 180, 1, 0)
guidance_exec$ceo_old_halfyr <- ifelse(guidance_exec$tenure_days_ceo >= 180, 1, 0)
guidance_exec$cgo_old <- guidance_exec$cgo * guidance_exec$ceo_old_halfyr
guidance_exec$cgo_new <- guidance_exec$cgo * guidance_exec$ceo_new_halfyr

c1 <- lm(bias ~ cgo_old + cgo_new + ceo_old_halfyr + sic_two + year, data = guidance_exec)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_exec, update(bias ~ cgo_old + cgo_new + ceo_old_halfyr, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_exec, update(bias ~ cgo_old + cgo_new + ceo_old_halfyr, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_exec, update(bias ~ cgo_old + cgo_new + ceo_old_halfyr, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_exec, update(bias ~ cgo_old + cgo_new + ceo_old_halfyr, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

rm(guidance_exec)

############################# Table A17 #############################
# Get Execucomp data
exec <- read.csv("execucomp_anncomp.csv")
names(exec) <- tolower(names(exec))
# Focus on CEOs
exec <- subset(exec, ceoann == "CEO")
# Combine CEO and guidance observations
exec <- exec %>% rename(prd_yr = year)
guidance_exec <- merge(guidance, exec, by = c("prd_yr","gvkey"))
# Define catering-consistent forecasts
guidance_exec$catering <- ifelse(guidance_exec$bias > 0 & guidance_exec$cgo < 0 |guidance_exec$bias < 0 & guidance_exec$cgo > 0, 1, 0)
# Summarize guidance by CEO/firm-combination
guide_exec <- guidance_exec
guidance_exec <- guidance_exec %>%
  group_by(co_per_rol) %>%               
  summarise(
    catering_ceo_mean      = mean(catering, na.rm = TRUE), 
    count_per_ceo     = n(),                            
    catering_ceo_sum  = sum(catering, na.rm = TRUE)     
  )
guidance_exec <- guidance_exec %>% select(co_per_rol, catering_ceo_mean, count_per_ceo)
exec <- merge(exec, guidance_exec, by = "co_per_rol")
# Focus on CEO/firm-year combinations with more than one forecasts in our sample
exec <- exec %>% subset(count_per_ceo > 1)
# Define CEO tenure
exec$start_year <- year(exec$becameceo)
exec$end_year <- year(exec$leftofc)
exec$tenure <- exec$end_year - exec$start_year
exec$tenure <- ifelse(exec$tenure < 0, NA, exec$tenure)
# Identify incumbent CEO at last firm-specific observation
exec <- exec %>%
  arrange(gvkey, prd_yr) %>%
  group_by(gvkey) %>%
  mutate(
    last_co_per_rol = last(co_per_rol, order_by = prd_yr)
  ) %>%
  ungroup()
exec$current_data_year <- max(exec$prd_yr, na.rm = TRUE)
exec <- exec %>%
  arrange(gvkey, prd_yr) %>%
  group_by(gvkey) %>%
  mutate(
    last_co_per_rol = last(co_per_rol),
    incumbent       = as.integer(
      co_per_rol == last_co_per_rol &
        prd_yr    == current_data_year
    )
  ) %>%
  ungroup()
# Summarize CEO outcomes by ceo-firm combination // "catering_ceo_mean" is constant, i.e., taking its mean does not change its values
exec_ceo_summary <- exec %>%
  group_by(co_per_rol) %>%
  summarize(
    catering_ceo_mean           = mean(catering_ceo_mean,           na.rm = TRUE),
    count_per_ceo               = max(count_per_ceo,           na.rm = TRUE),
    tenure                      = mean(tenure,                  na.rm = TRUE),
    incumbent                   = max(incumbent,                na.rm = TRUE),
    salary                      = mean(salary,                  na.rm = TRUE),
    tdc1                        = mean(tdc1,                    na.rm = TRUE),
    .groups = "drop"
  )

# Identify CEOs whose guidance is consistent mostly consistent with catering
exec_ceo_summary$mostly_catering <- ifelse(exec_ceo_summary$catering_ceo_mean > 0.5, 1, 0)

# Winsorize all variables
vars_to_winsor <- c("catering_ceo_mean", "tenure", "incumbent", "salary", "tdc1")

exec_ceo_summary <- exec_ceo_summary %>%
  mutate(across(
    all_of(vars_to_winsor),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))

# Define non-salary as difference of total compensation and salary
exec_ceo_summary$non_salary <- exec_ceo_summary$tdc1 - exec_ceo_summary$salary
# Create separate samples
exec_mostly <- subset(exec_ceo_summary, mostly_catering == 1)
exec_no <- subset(exec_ceo_summary, mostly_catering == 0)

# Get mean differences
mean(exec_mostly$tdc1)
mean(exec_no$tdc1)

mean(exec_mostly$salary)
mean(exec_no$salary)

mean(exec_mostly$non_salary)
mean(exec_no$non_salary)

mean(exec_mostly$tenure)
mean(exec_no$tenure)

mean(exec_mostly$incumbent)
mean(exec_no$incumbent)

# Test differences fro significance
tdc1       <- lm(tdc1       ~ mostly_catering, data = exec_ceo_summary)
salary     <- lm(salary     ~ mostly_catering, data = exec_ceo_summary)
non_salary <- lm(non_salary ~ mostly_catering, data = exec_ceo_summary)
tenure     <- lm(tenure     ~ mostly_catering, data = exec_ceo_summary)
incumbent  <- lm(incumbent  ~ mostly_catering, data = exec_ceo_summary)

modelsummary(list("TDC1"=tdc1,"Salary"=salary,"Non-salary"=non_salary,"Tenure"=tenure,"Incumbent"=incumbent), vcov = sandwich::vcovHC, statistic = "({statistic})", estimate = "{estimate}{stars}", stars = c("*"=.10,"**"=.05,"***"=.01), gof_omit = "AIC|BIC|Log.Lik|RMSE|R2|Adj", output = "latex", title = "CEO Characteristics and Mostly Catering CEOs", fmt = 4, escape = FALSE)
rm(tdc1, salary, non_salary, tenure, incumbent)
rm(exec_mostly, exec_no)

############################# Table A18 #############################
# Get Execucomp firm-year panel
exec <- read.csv("execucomp_anncomp.csv")
names(exec) <- tolower(names(exec))
# Focus on CEOs only
exec <- subset(exec, ceoann == "CEO")
exec <-  exec %>% select(gvkey, year, co_per_rol, age, gender, tdc1) 
exec <- exec %>%
  arrange(gvkey, year) %>%
  group_by(gvkey)
# Identify CEO turnover
exec <- exec %>%  mutate(ceo_turnover = ifelse(co_per_rol != dplyr:: lead(co_per_rol), 1, 0))
# Use Gentry et al. (2018, SMJ) database to identify CEO dismissals (https://zenodo.org/records/4543893)
exec_dis <- read_stata("ceo_dismissal.dta") 
exec_dis <- exec_dis %>% rename(year = fyear)
exec_dis <- exec_dis %>% select(gvkey, year, ceo_dismissal)
exec <- merge(exec, exec_dis, by = c("gvkey","year"), all.x = TRUE)
rm(exec_dis)
exec$ceo_dismissal <- ifelse(is.na(exec$ceo_dismissal) == 1, 0, exec$ceo_dismissal)
# Focus only on observations where we know whether the CEO has changed
exec <- exec %>% subset(!is.na(ceo_turnover))
guide_exec <- guide_exec %>% select(gvkey, guidance_date, catering, fyear_end, bias)
cs <- read.csv("crsp_compustat.csv")
names(cs) <- tolower(names(cs))
cs <- cs %>% select(gvkey, fyear, datadate, sic, lpermno)
cs <- cs %>% rename(year = fyear, permno = lpermno)
cs$fyear_end <- ymd(cs$datadate)
cs <- cs %>%
  arrange(gvkey, datadate) %>%              
  group_by(gvkey) %>%
  mutate(
    prev_fyear_end = dplyr::lag(datadate, 1) 
  ) %>%
  ungroup()
cs$datadate <- NULL
exec <- merge(exec, cs, by = c("gvkey","year"))
rm(cs)
exec <- merge(exec, guide_exec, by = c("gvkey","fyear_end"), all.x = TRUE)
rm(guide_exec)
exec$guidance <- ifelse(!is.na(exec$catering), 1, 0)
exec$catering_dummy <- ifelse(is.na(exec$catering),0,exec$catering)
exec$sic_two <- factor(substr(exec$sic, 1, 2))
# Measure all stock-based explanatory variables at the prior fiscal year end
exec$prev_fyear_end <- as.Date(exec$prev_fyear_end)
exec$month <- format(exec$prev_fyear_end, "%Y%m")
exec$month <- as.integer(exec$month)
# Add stock- and accounting-based explanatory variables
stocks <- read.csv("data_for_further_analyses.csv", header=FALSE)
stocks <- stocks %>% 
  rename(permno = V1,
         month = V2,
         date = V3,
         size = V4,
         stock_issues = V5,
         beta = V6,
         ivol = V7,
         sue = V8,
         sue_date = V9,
         short = V10,
         turnover = V11,
         bm = V12,
         op = V13,
         ep = V14,
         disp = V15,
         analysts = V16,
         vola = V17,
         ret1m = V18,
         ret2m = V19,
         ret3m = V20,
         ret1yr = V21,
         ret2yr = V22,
         ret3yr = V23,
         ret4yr = V24,
         ret5yr = V25,
         cgo = V26,
         percgain = V27,
         cgo_placebo = V28,
         cgo_daily = V29
  )
# Keep only relevant variables
stocks <- stocks %>% select(permno, month, cgo, beta, size, bm, op, analysts, disp, vola, stock_issues)
exec <- merge(exec, stocks, by = c("permno","month"))
rm(stocks)
# Add U of M sentiment index
sentiment <- read.csv("u_of_m_index.csv")
month_lookup <- c("January" = "01", "February" = "02", "March" = "03", "April" = "04",
                  "May" = "05", "June" = "06", "July" = "07", "August" = "08",
                  "September" = "09", "October" = "10", "November" = "11", "December" = "12")
sentiment$numeric_month <- month_lookup[sentiment$Month]
rm(month_lookup)
sentiment$month <- paste0(sentiment$YYYY, sentiment$numeric_month)
sentiment <- sentiment %>% select(month, ICS_ALL)
sentiment <- sentiment %>% rename(sentiment = ICS_ALL)
exec <- merge(exec, sentiment, by = "month")
rm(sentiment)
exec$stock_issues_d <- ifelse(exec$stock_issues > 0, 1, 0)
# Winsorize continuous explanatory variables
vars_to_winsor <- c("cgo", "size", "beta", "bm", "op", "analysts", "disp", "vola","sentiment")
exec <- exec %>%
  mutate(across(
    all_of(vars_to_winsor),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))
exec$year <- as.factor(exec$year)
exec$permno <- as.factor(exec$permno)
exec <- merge(exec, guidance_exec, by = "co_per_rol", all.x = TRUE)
exec$mostly_catering <- ifelse(exec$catering_ceo_mean > 0.5, 1, 0)
exec$female <- ifelse(exec$gender == "FEMALE", 1, 0)
controls_exec <- c("beta","size","bm","op","sentiment","vola","age","female")
# Require two forecasts per CEO-firm combination
exec_2 <- subset(exec, count_per_ceo > 1)
# For dismissal: only until 2018 due to data availability
exec_3 <- subset(exec_2, year(fyear_end) < 2019)

c1 <- lm(ceo_turnover ~ mostly_catering + cgo + sic_two + year, data = exec_2)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = exec_2, update(ceo_turnover ~ mostly_catering + cgo + sic_two + year, as.formula(paste(". ~ . +", paste(controls_exec, collapse = " + ")))))
coeftest(c2, vcov = vcovHC, type = "HC1")
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(ceo_dismissal ~ mostly_catering + cgo + sic_two + year, data = exec_3)
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = exec_3, update(ceo_dismissal ~ mostly_catering + cgo + sic_two + year, as.formula(paste(". ~ . +", paste(controls_exec, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(tdc1 ~ mostly_catering + cgo + sic_two + year, data = exec_2)
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = exec_2, update(tdc1 ~ mostly_catering + cgo + sic_two + year, as.formula(paste(". ~ . +", paste(controls_exec, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5,"(6)"=c6), vcov = function(x) sandwich::vcovCL(x, cluster = ~ permno + year), coef_omit = "year|sic_two|permno", estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*"=.10,"**"=.05,"***"=.01), gof_omit = "AIC|BIC|Log.Lik|RMSE|R2", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape = FALSE)
rm(c1, c2, c3, c4, c5, c6, se_1, se_2, se_3, se_4, se_5, se_6)
rm(exec, exec_2, exec_3, exec_ceo_summary, guidance_exec)

############################# Table A19 #############################
### Load Dechow et al. Annual Misstatement data (https://sites.google.com/usc.edu/aaerdataset/misstatement-type) // 1982 - 2021
misstatement_data <- read_excel("misstatement_data.xlsx", sheet = "ann")
names(misstatement_data) <- tolower(names(misstatement_data))
misstatement_data <- misstatement_data %>% select(gvkey, yeara, date)
misstatement_data <- misstatement_data %>% rename(year = yeara, fyear_end = date)
misstatement_data$fyear_end <- ymd(misstatement_data$fyear_end)
misstatement_data$misstatement_d <- 1

### Load CRSP/Compustat data
compustat <- read.csv("crsp_compustat.csv")
names(compustat) <- tolower(names(compustat))
compustat$fyear_end <- ymd(compustat$datadate)
compustat <- compustat %>% rename(permno = lpermno)
compustat <- merge(compustat, misstatement_data, by = c("gvkey","fyear_end"), all.x = TRUE)
rm(misstatement_data)
compustat$misstatement_d <- ifelse(is.na(compustat$misstatement_d),0, compustat$misstatement_d)
# Lag FYEAR_END for subsequent merges by one period
compustat <- compustat %>%
  mutate(fyear_end = as.Date(fyear_end)) %>%             
  arrange(gvkey, fyear_end) %>%
  group_by(gvkey) %>%
  mutate(prev_fyear_end = dplyr::lag(fyear_end, n = 1)) %>%
  ungroup()
compustat <- compustat %>%
  mutate(month = as.integer(format(prev_fyear_end, "%Y%m")))

### Import pre-processed stock market data
stocks <- read.csv("data_for_further_analyses.csv", header=FALSE)
stocks <- stocks %>% 
  rename(permno = V1,
         month = V2,
         date = V3,
         size = V4,
         stock_issues = V5,
         beta = V6,
         ivol = V7,
         sue = V8,
         sue_date = V9,
         short = V10,
         turnover = V11,
         bm = V12,
         op = V13,
         ep = V14,
         disp = V15,
         analysts = V16,
         vola = V17,
         ret1m = V18,
         ret2m = V19,
         ret3m = V20,
         ret1yr = V21,
         ret2yr = V22,
         ret3yr = V23,
         ret4yr = V24,
         ret5yr = V25,
         cgo = V26,
         percgain = V27,
         cgo_placebo = V28,
         cgo_daily = V29
  )

compustat <- merge(compustat, stocks, by = c("permno","month"))
rm(stocks)

### Import University of Michigan Consumer Sentiment data
sentiment <- read.csv("u_of_m_index.csv")

# Create month variable for subsequent merge
month_lookup <- c("January" = "01", "February" = "02", "March" = "03", "April" = "04",
                  "May" = "05", "June" = "06", "July" = "07", "August" = "08",
                  "September" = "09", "October" = "10", "November" = "11", "December" = "12")
sentiment$numeric_month <- month_lookup[sentiment$Month]
rm(month_lookup)
sentiment$month <- paste0(sentiment$YYYY, sentiment$numeric_month)
sentiment <- sentiment %>% select(month, ICS_ALL)
sentiment <- sentiment %>% rename(sentiment = ICS_ALL)

compustat <- merge(compustat, sentiment, by = "month")
rm(sentiment)

# Date restriction (data availability)
compustat <- subset(compustat, year(fyear_end) >= 2001 & year(fyear_end) <= 2021)

# Loss Dummy
compustat$loss_d <- ifelse(compustat$ep < 0, 1,0)

# Define process risk based on SIC
compustat$process <- ifelse(compustat$sic >= 2833 & compustat$sic <= 2836 | compustat$sic >= 8731 & compustat$sic <= 8734 | compustat$sic >= 3570 & compustat$sic <= 3577 | compustat$sic >= 7371 & compustat$sic <= 7379 | compustat$sic >= 3600 & compustat$sic <= 3674 | compustat$sic >= 5200 & compustat$sic <= 5961, 1, 0)
# Industry dummies based on two-digit SIC code
compustat$sic_two <- factor(substr(compustat$sic, 1, 2))

# Year dummies
compustat$year <- factor(compustat$fyear)

# Firm dummies
compustat$permno <- factor(compustat$permno)

# Net Equity Issuer
compustat$stock_issues_d <- ifelse(compustat$stock_issues > 0, 1, 0)

# Require availability of CGO
compustat <- subset(compustat, !is.na(cgo))

# Require being part of our main dataset
actoeg_data <- guidance
actoeg_data <- actoeg_data %>% select(permno,fyear_end,bias)
actoeg_data$guidance_d <- 1
actoeg_data$permno <- as.factor(actoeg_data$permno)
compustat <- merge(compustat, actoeg_data, by = c("permno","fyear_end"))
# Identify forecasts consistent with catering
compustat$consistent_catering <- ifelse(compustat$bias > 0 & compustat$cgo < 0 | compustat$bias < 0 & compustat$cgo > 0, 1, 0)

### Winsorization - winsorize all continuous variables at 1% level
winsor_vars <- c("cgo","beta","size","bm","op","analysts","disp","vola","stock_issues","sentiment","turnover","short")
compustat <- compustat %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))

### Define new sets of controls for following analysis
controls_1 <- c("beta","size","bm","sic_two","year")
controls_2 <- c(controls_1,"loss_d","process")
controls_3 <- c(controls_2, "op","analysts","disp","vola","stock_issues_d","sentiment")
controls_4 <- c("beta","size","bm","loss_d","op","analysts","disp","vola","stock_issues_d","sentiment","permno","year")

c1 <- lm(misstatement_d ~ cgo * consistent_catering + sic_two + year, data = compustat)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = compustat, update(misstatement_d ~  cgo * consistent_catering, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = compustat, update(misstatement_d ~ cgo * consistent_catering, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = compustat, update(misstatement_d ~ cgo * consistent_catering, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = compustat, update(misstatement_d ~ cgo * consistent_catering, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)
rm(compustat, actoeg_date)

### Reset sets of controls
controls_1 <- c("beta","size","bm","sic_two","year")
controls_2 <- c(controls_1, "horizon","loss_d","process","prev_bias")
controls_3 <- c(controls_2, "op","analysts","disp","vola","stock_issues_d","sentiment")
controls_4 <- c("beta","size","bm","horizon","loss_d","prev_bias","op","analysts","disp","vola","stock_issues_d","sentiment","permno","year")

############################# Table A20 #############################
# Create quintiles based on in-sample rank
PF <- guidance  %>% mutate(cgo_rank_insample = ntile(cgo, 100))

high_cgo <- PF[PF$cgo_rank_insample > 80,]
PF_4 <- PF[PF$cgo_rank_insample <= 80 & PF$cgo_rank_insample > 60,]
PF_3 <- PF[PF$cgo_rank_insample <= 60 & PF$cgo_rank_insample > 40,]
PF_2 <- PF[PF$cgo_rank_insample <= 40 & PF$cgo_rank_insample > 20,]
low_cgo <- PF[PF$cgo_rank_insample <= 20,]

# Guidance Bias# 
mean(low_cgo$bias)
mean(PF_2$bias)
mean(PF_3$bias)
mean(PF_4$bias)
mean(high_cgo$bias)

# Guidance CAR # 
mean(low_cgo$guidance_car)
mean(PF_2$guidance_car)
mean(PF_3$guidance_car)
mean(PF_4$guidance_car)
mean(high_cgo$guidance_car)

# Earnings Announcement CAR #
mean(low_cgo$ea_car)
mean(PF_2$ea_car)
mean(PF_3$ea_car)
mean(PF_4$ea_car)
mean(high_cgo$ea_car)

# Test differences between low and high-CGO #
PF_long_short <- PF[PF$cgo_rank_insample > 80 | PF$cgo_rank_insample <= 20,]
PF_long_short$dummy <- ifelse(PF_long_short$cgo_rank_insample > 80, 1, 0)

bias <- lm(bias ~ dummy, data = PF_long_short)
se_bias <- sqrt(diag(vcovCL(bias, cluster = ~ permno + year)))

car <- lm(guidance_car ~ dummy, data = PF_long_short)
se_car <- sqrt(diag(vcovCL(car, cluster = ~ permno + year)))

car_ea <- lm(ea_car ~ dummy, data = PF_long_short)
se_ea <- sqrt(diag(vcovCL(car_ea, cluster = ~ permno + year)))

modelsummary(list(bias, car, car_ea), vcov = function(x) vcovCL(x, cluster = ~ permno + year), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), fmt = 4, output = "latex", gof_omit = "AIC|BIC|Log.Lik|F|RMSE|R2|Adj", escape = FALSE)
rm(bias, car, car_ea,se_bias, se_car, se_ea )
rm(low_cgo, high_cgo, PF_2, PF_3, PF_4)

############################# Table A21 #############################
c1 <- ivreg(data = guidance, guidance_car ~ bias + beta + size + bm + horizon + loss_d + process + prev_bias + op + analysts + disp + vola + stock_issues_d + sentiment + sic_two + year | cgo + beta + size + bm + horizon + loss_d + process + prev_bias + op + analysts + disp + vola + stock_issues_d + sentiment + sic_two + year)
lmtest::coeftest(c1,vcov. = clubSandwich::vcovCR(c1,cluster = with(guidance, interaction(permno, year))[as.integer(rownames(stats::model.matrix(c1)))],type = "CR2"))
c2 <- ivreg(data = guidance, ea_car ~ bias + beta + size + bm + horizon + loss_d + process + prev_bias + op + analysts + disp + vola + stock_issues_d + sentiment + sic_two + year | cgo + beta + size + bm + horizon + loss_d + process + prev_bias + op + analysts + disp + vola + stock_issues_d + sentiment + sic_two + year)
lmtest::coeftest(c2,vcov. = clubSandwich::vcovCR(c1,cluster = with(guidance, interaction(permno, year))[as.integer(rownames(stats::model.matrix(c1)))],type = "CR2"))
rm(c1, c2)

############################# Table A22 #############################
# Load quarterly earnings announcement dates 
ibes_actuals_qtr <- read.csv("ibes_actuals_qtr.csv")
names(ibes_actuals_qtr) <- tolower(names(ibes_actuals_qtr))
ibes_actuals_qtr <- subset(ibes_actuals_qtr, cusip != "")
ibes_actuals_qtr <- ibes_actuals_qtr %>% rename(ncusip = cusip)
# Add permno firm identifier
linking_table <- read.csv("ncsuip-permno-cusip-merge.csv")
names(linking_table) <- tolower(names(linking_table))
linking_table$pends <- ymd(linking_table$date)
ibes_actuals_qtr$pends <- ymd(ibes_actuals_qtr$pends) 
ibes_actuals_qtr <- merge(ibes_actuals_qtr, linking_table, by = c("ncusip","pends"))
rm(linking_table)
# Load market reactions around intermediate earnings announcements // obtained via WRDS Event Study
qrt_ea_car <- read.csv("ea_qrt_car_permno.csv")
names(qrt_ea_car) <- tolower(names(qrt_ea_car))
qrt_ea_car <- qrt_ea_car %>% select(permno, evtdate, car)
qrt_ea_car <- qrt_ea_car %>% rename(qrt_ea_date = evtdate, qrt_ea_car = car)
qrt_ea_car$qrt_ea_date <- ymd(qrt_ea_car$qrt_ea_date)
qrt_ea_car$qrt_ea_car <- qrt_ea_car$qrt_ea_car * 100
qrt_ea_car$permno <- as.factor(qrt_ea_car$permno)
# Keep earnings announcements after the guidance date and before the associated final earnings announcements
guidance_eaqrt <- guidance %>%
  inner_join(qrt_ea_car, by = "permno") %>%
  filter(qrt_ea_date > guidance_date & qrt_ea_date < ea_date)
rm(qrt_ea_car)
# Identify annual EPS guidance at intermediate earnings announcements
guid <- read.csv("ibes_guidance.csv")
guid <- subset(guid, measure == "EPS")
guid <- subset(guid, pdicity == "ANN")
guid <- subset(guid, range_desc == 1 | range_desc == 2)
guid$eps_f_qrt <- ifelse(guid$range_desc == 1, (guid$val_1 + guid$val_2)/2, guid$val_1)
guid$qrt_ea_date <- ymd(guid$anndats)
guid <- guid %>% select(ticker, qrt_ea_date, eps_f_qrt, prd_yr, prd_mon)
guid <- unique(guid)
guidance_eaqrt <- merge(guidance_eaqrt, guid, by = c("ticker","qrt_ea_date","prd_yr","prd_mon"))
# Separate first, second and third intermediate earnings announcement
guidance_eaqrt_first <- guidance_eaqrt %>% group_by(permno, guidance_date, ea_date) %>% slice_min(order_by = qrt_ea_date, n = 1, with_ties = FALSE) %>% ungroup()
guidance_eaqrt_second <- guidance_eaqrt %>% group_by(permno, guidance_date, ea_date) %>% arrange(qrt_ea_date, .by_group = TRUE) %>% slice(2) %>% ungroup()
guidance_eaqrt_third <- guidance_eaqrt %>% group_by(permno, guidance_date, ea_date) %>% arrange(qrt_ea_date, .by_group = TRUE) %>% slice(3) %>% ungroup()
# Winsorize EA CAR separately by sample
winsor_vars <- c("qrt_ea_car")
guidance_eaqrt_first <- guidance_eaqrt_first %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))
winsor_vars <- c("qrt_ea_car")
guidance_eaqrt_second <- guidance_eaqrt_second %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))
guidance_eaqrt_third <- guidance_eaqrt_third %>%
  mutate(across(
    all_of(winsor_vars),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))

c1 <- lm(data = guidance_eaqrt_first, update(qrt_ea_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_eaqrt_second, update(qrt_ea_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_eaqrt_third, update(qrt_ea_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3)

modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, se_1, se_2, se_3)

############################# Table A23 #############################
# Load actual annual EA dates 
actuals <- read.csv("ibes_actuals.csv")
names(actuals) <- tolower(names(actuals))
actuals <- subset(actuals, measure == "EPS")
actuals <- subset(actuals, pdicity == "ANN")
actuals <- subset(actuals, curr_act == "USD")
actuals$ea_date <- ymd(actuals$anndats)
actuals$fyear_end <- ymd(actuals$pends)
actuals <- actuals %>% rename(eps_r = value)
# Add linking table
linking_table <- read.csv("ncsuip-permno-cusip-merge.csv")
names(linking_table) <- tolower(names(linking_table))
linking_table$fyear_end <- ymd(linking_table$date)
actuals <- actuals %>% rename(ncusip = cusip)
actuals <- merge(actuals, linking_table, by = c("ncusip","fyear_end"))
rm(linking_table)
# Generate input for WRDS event study tool
actuals <- subset(actuals, ncusip != "")
actuals <- actuals %>% select(permno, fyear_end, ea_date)
actuals <- subset(actuals, permno != "")
#all_ea_dates <- actuals[, c("permno", "ea_date")]
#write.table(all_ea_dates, file = "all_ea_dates_permno.txt", sep = " ", row.names = FALSE, col.names = FALSE, quote = FALSE)
#rm(all_ea_dates)
# Load EA CARs generated by WRDS event study tool
all_ea_car <- read.csv("all_ea_car_permno.csv")
all_ea_car <- all_ea_car %>% select(permno, evtdate, car)
all_ea_car <- all_ea_car %>% rename(
  ea_date = evtdate,
  ea_car = car
)
actuals <- merge(actuals, all_ea_car, by = c("permno","ea_date"))
rm(all_ea_car)
# Define universe of matchable firms via Compustat annual data
cs <- read.csv("crsp_compustat.csv")
names(cs) <- tolower(names(cs))
cs$cusip <- substr(cs$cusip, 1, 8)
cs$sic_two <- factor(substr(cs$sic, 1, 2))
cs$fyear_end <- ymd(cs$datadate)
# Define variables for subsequent match
cs$mcap <- cs$prcc_f * cs$csho
cs$size <- log(cs$mcap)
cs$bm <- (cs$seq + cs$txditc - cs$pstk) / cs$mcap
cs <- cs %>% rename(permno = lpermno)
# Lag variables for match // defined at prior fyear-end
cs <- cs %>%
  arrange(permno, fyear) %>%
  group_by(permno) %>%
  mutate(
    mcap_lag  = dplyr::lag(mcap),
    size_lag  = dplyr::lag(size),
    bm_lag    = dplyr::lag(bm)
  ) %>%
  ungroup()
cs <- cs %>% select(fyear_end, cusip,sic_two,sic, mcap_lag, size_lag, bm_lag,fyear,permno)
ea_all <- merge(actuals, cs, by = c("permno","fyear_end"))
rm(actuals)
# EA CAR in %
ea_all$ea_car <- ea_all$ea_car * 100
guidance_reduced <- guidance %>% select(permno, fyear_end, ea_date, ea_car,month)
guidance_reduced$ea_date <- ymd(guidance_reduced$ea_date)
# Mark guiding firms
guidance_reduced$guidance <- 1
cs$permno <- as.factor(cs$permno)
guidance_reduced <- merge(guidance_reduced, cs, by = c("permno","fyear_end"))
rm(cs)
ea_all$permno <- as.factor(ea_all$permno)
# Ensure that our dataset for the subsequent match does not contain any of our guiding observations
ea_all <- anti_join(ea_all, guidance_reduced, by = c("permno","fyear_end"))
ea_all$guidance <- 0
ea_all$month <- NA
ea_all <- rbind(guidance_reduced, ea_all)
rm(guidance_reduced)
# Require availability of matching variables
ea_all <- ea_all %>% filter(!is.na(size_lag))
ea_all <- ea_all %>% filter(!is.na(bm_lag))
m.out <- matchit(
  as.factor(guidance) ~ size_lag + bm_lag,
  data   = ea_all,
  method = "nearest",
  exact  = ~ fyear + sic_two
)
matched_sample <- match.data(m.out)
rm(m.out, ea_all)
matched_sample <- matched_sample %>% rename(match_id = subclass)
# For subsequent match of monthly explanatory variables: define non-guiding firm's month as guiding firm's month
matched_sample <- matched_sample %>%
  arrange(match_id, desc(guidance)) %>% 
  group_by(match_id) %>%                
  mutate(month = ifelse(guidance == 0, first(month[guidance == 1], na.rm = TRUE), month)) %>%  
  ungroup() 
# Load monthly explanatory variables
stocks <- read.csv("data_for_further_analyses.csv", header=FALSE)
stocks <- stocks %>% 
  rename(permno = V1,
         month = V2,
         date = V3,
         size = V4,
         stock_issues = V5,
         beta = V6,
         ivol = V7,
         sue = V8,
         sue_date = V9,
         short = V10,
         turnover = V11,
         bm = V12,
         op = V13,
         ep = V14,
         disp = V15,
         analysts = V16,
         vola = V17,
         ret1m = V18,
         ret2m = V19,
         ret3m = V20,
         ret1yr = V21,
         ret2yr = V22,
         ret3yr = V23,
         ret4yr = V24,
         ret5yr = V25,
         cgo = V26,
         percgain = V27,
         cgo_placebo = V28,
         cgo_daily = V29
  )
matched_sample <- merge(matched_sample, stocks, by = c("permno","month"))
rm(stocks)
# Define additional control variables
matched_sample$loss_d <- ifelse(matched_sample$ep < 0, 1,0)
matched_sample$process <- ifelse(matched_sample$sic >= 2833 & matched_sample$sic <= 2836 | matched_sample$sic >= 8731 & matched_sample$sic <= 8734 | matched_sample$sic >= 3570 & matched_sample$sic <= 3577 | matched_sample$sic >= 7371 & matched_sample$sic <= 7379 | matched_sample$sic >= 3600 & matched_sample$sic <= 3674 | matched_sample$sic >= 5200 & matched_sample$sic <= 5961, 1, 0)
matched_sample$stock_issues_d <- ifelse(matched_sample$stock_issues > 0, 1, 0)
# Load sentiment data
sentiment <- read.csv("u_of_m_index.csv")
month_lookup <- c("January" = "01", "February" = "02", "March" = "03", "April" = "04",
                  "May" = "05", "June" = "06", "July" = "07", "August" = "08",
                  "September" = "09", "October" = "10", "November" = "11", "December" = "12")
sentiment$numeric_month <- month_lookup[sentiment$Month]
rm(month_lookup)
sentiment$month <- paste0(sentiment$YYYY, sentiment$numeric_month)
sentiment <- sentiment %>% select(month, ICS_ALL)
sentiment <- sentiment %>% rename(sentiment = ICS_ALL)
matched_sample <- merge(matched_sample, sentiment, by = "month")
rm(sentiment)
matched_sample$year <- factor(matched_sample$fyear)
# Require availability of all explanatory variables
matched_sample <- subset(matched_sample, !is.na(cgo) & !is.na(ea_car) & !is.na(beta) & !is.na(size) & !is.na(bm) & !is.na(loss_d) & !is.na(process) & !is.na(op) & !is.na(analysts) & !is.na(disp) & !is.na(vola) & !is.na(stock_issues_d) & !is.na(sentiment))
# Require availability of a match
matched_sample <- matched_sample %>%
  group_by(match_id) %>%
  filter(n() == 2) %>%
  ungroup()
# Winsorize all continuous variables
vars_to_winsor <- c(
  "ea_car", "cgo", "beta",
  "size", "bm", "op", "analysts", "disp", "vola", "sentiment"
)
matched_sample <- matched_sample %>%
  mutate(across(
    all_of(vars_to_winsor),
    ~ {
      qs <- quantile(.x, probs = c(0.01, 0.99), na.rm = TRUE, names = FALSE)
      DescTools::Winsorize(.x, val = qs)
    }
  ))
# Define different sets of controls
controls_1m <- c("beta","size","bm")
controls_2m <- c(controls_1m,"loss_d","process")
controls_3m <- c(controls_2m, "op","analysts","disp","vola","stock_issues_d","sentiment")
# Separate guiding and non-guiding matched firms
matched_sample_no <- subset(matched_sample, guidance == 0)
matched_sample_yes <- subset(matched_sample, guidance == 1)

c1 <- lm(data = matched_sample_yes, ea_car ~ cgo + beta + size + bm)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = matched_sample_yes, update(ea_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_3m, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = matched_sample_no, ea_car ~ cgo + beta + size + bm)
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = matched_sample_no, update(ea_car ~ cgo, as.formula(paste(". ~ . +", paste(controls_3m, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = matched_sample, ea_car ~ cgo * guidance + beta + size + bm)
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = matched_sample, update(ea_car ~ cgo  * guidance, as.formula(paste(". ~ . +", paste(controls_3m, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

modelsummary(list("(1)"=c1,"(2)"=c2,"(3)"=c3,"(4)"=c4,"(5)"=c5,"(6)"=c6),   vcov = list(vcovCL(c1, cluster=~permno+year),vcovCL(c2, cluster=~permno+year),vcovCL(c3, cluster=~permno+year),vcovCL(c4, cluster=~permno+year),vcovCL(c5, cluster=~permno+year),vcovCL(c6, cluster=~permno+year)), coef_omit="year|sic_two|permno", estimate="{estimate}{stars}", statistic="({statistic})", stars=c("*"=.10,"**"=.05,"***"=.01), gof_keep="Adj.R2|N", gof_omit="AIC|BIC|Log.Lik|RMSE|R2$", fmt=4, output="latex", escape=FALSE)

rm(c1, c2, c3, c4, c5, c6, se_1, se_2, se_3, se_4, se_5, se_6)
rm(matched_sample, matched_sample_no, matched_sample_yes)

############################# Table A24 #############################
c1 <- lm(bias ~ cgo + ret5yr_c + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A25 #############################
to <- lm(data = guidance, update(bias ~ cgo * turnover_d + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_to <- sqrt(diag(vcovCL(to, cluster = ~ permno + year)))

rsi <- lm(data = guidance, update(bias ~ cgo * short_d + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_rsi <- sqrt(diag(vcovCL(rsi, cluster = ~ permno + year)))

beta <- lm(data = guidance, update(bias ~ cgo * beta_d + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_beta <- sqrt(diag(vcovCL(beta, cluster = ~ permno + year)))

vola <- lm(data = guidance, update(bias ~ cgo * vola_d + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_vola <- sqrt(diag(vcovCL(beta, cluster = ~ permno + year)))

disp <- lm(data = guidance, update(bias ~ cgo * disp_d + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_disp <- sqrt(diag(vcovCL(disp, cluster = ~ permno + year)))

hori <- lm(data = guidance, update(bias ~ cgo * horizon_d + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_hori <- sqrt(diag(vcovCL(hori, cluster = ~ permno + year)))

modelsummary(list(to, rsi, beta, vola, disp, hori), vcov = function(x) vcovCL(x, cluster = ~ permno + year), coef_map = c("cgo", "cgo:turnover_d", "cgo:short_d", "cgo:beta_d", "cgo:vola_d", "cgo:disp_d", "cgo:horizon_d", "turnover_d", "short_d", "beta_d", "vola_d", "disp_d", "horizon_d","ret5yr_c"), estimate = "{estimate}{stars}", statistic = "({statistic})", stars = c("*" = 0.10, "**" = 0.05, "***" = 0.01), fmt = 4, output = "latex", title = "Guidance Bias", gof_omit = "AIC|BIC|Log.Lik|F|RMSE", escape = FALSE, add_rows = tibble::tibble(term = c("Firm Controls", "Year Fixed Effects", "Industry Fixed Effects", "Adjusted R$^2$"), `(1)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(to)$adj.r.squared)), `(2)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(rsi)$adj.r.squared)), `(3)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(beta)$adj.r.squared)), `(4)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(vola)$adj.r.squared)), `(5)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(disp)$adj.r.squared)), `(6)` = c("Yes", "Yes", "Yes", sprintf("%.4f", summary(hori)$adj.r.squared))))
rm(to, rsi, beta, vola, disp, hori, se_to, se_rsi, se_beta, se_vola, se_disp, se_hori)

############################# Table A26 #############################
c1 <- lm(guidance_car ~ cgo + ret5yr_c + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(guidance_car ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(guidance_car ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(ea_car ~ cgo + ret5yr_c + sic_two + year, data = guidance)
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(ea_car ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = guidance, update(ea_car ~ cgo + ret5yr_c, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5, "(6)" = c6)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, c6, se_1, se_2, se_3, se_4, se_5, se_6)

############################# Table A27 #############################
# Create alternative winsorization levels of CGO
guidance <- guidance %>%
  mutate(
    cgo_5_95  = { qs <- quantile(cgo_non_w, c(.05,.95), na.rm=TRUE, names=FALSE); DescTools::Winsorize(cgo_non_w, val=qs) },
    cgo_10_90 = { qs <- quantile(cgo_non_w, c(.10,.90), na.rm=TRUE, names=FALSE); DescTools::Winsorize(cgo_non_w, val=qs) },
    cgo_25_75 = { qs <- quantile(cgo_non_w, c(.25,.75), na.rm=TRUE, names=FALSE); DescTools::Winsorize(cgo_non_w, val=qs) }
  )

c1 <- lm(data = guidance, update(bias ~ cgo_non_w, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ cgo_5_95, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ cgo_10_90, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ cgo_25_75, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A28 #############################
guidance$abs_cgo_small <- ifelse(abs(guidance$cgo) < 0.1, 1, 0)

c1 <- lm(bias ~ cgo * abs_cgo_small + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ cgo * abs_cgo_small, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ cgo * abs_cgo_small, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ cgo * abs_cgo_small, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ cgo * abs_cgo_small, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)

############################# Table A29 #############################
# Retain only observations where we have the prior year in our sample
guidance_prev <- guidance %>% select(permno, prd_yr)
guidance_prev$prd_yr <- guidance_prev$prd_yr - 1
guidance_repeated <- merge(guidance, guidance_prev, by = c("permno","prd_yr"))

c1 <- lm(bias ~ cgo + sic_two + year, data = guidance_repeated)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_repeated, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_repeated, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_repeated, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_repeated, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)
rm(guidance_repeated, guidance_prev)

############################# Table A30 #############################
guidance_range <- subset(guidance, eps_f_range == 1)
guidance_point <- subset(guidance, eps_f_range == 0)

c1 <- lm(data = guidance_point, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance_point, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance_point, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance_range, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance_range, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

c6 <- lm(data = guidance_range, update(bias ~ cgo, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_6 <- sqrt(diag(vcovCL(c6, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5, "(6)" = c6)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, c6, se_1, se_2, se_3, se_4, se_5, se_6)
rm(guidance_range, guidance_point)

############################# Table A31 #############################
c1 <- lm(bias ~ cgo_daily + sic_two + year, data = guidance)
se_1 <- sqrt(diag(vcovCL(c1, cluster = ~ permno + year)))

c2 <- lm(data = guidance, update(bias ~ cgo_daily, as.formula(paste(". ~ . +", paste(controls_1, collapse = " + ")))))
se_2 <- sqrt(diag(vcovCL(c2, cluster = ~ permno + year)))

c3 <- lm(data = guidance, update(bias ~ cgo_daily, as.formula(paste(". ~ . +", paste(controls_2, collapse = " + ")))))
se_3 <- sqrt(diag(vcovCL(c3, cluster = ~ permno + year)))

c4 <- lm(data = guidance, update(bias ~ cgo_daily, as.formula(paste(". ~ . +", paste(controls_3, collapse = " + ")))))
se_4 <- sqrt(diag(vcovCL(c4, cluster = ~ permno + year)))

c5 <- lm(data = guidance, update(bias ~ cgo_daily, as.formula(paste(". ~ . +", paste(controls_4, collapse = " + ")))))
se_5 <- sqrt(diag(vcovCL(c5, cluster = ~ permno + year)))

vcov_fun <- function(x) vcovCL(x, cluster = ~ permno + year)
models <- list("(1)" = c1, "(2)" = c2, "(3)" = c3,"(4)" = c4,"(5)" = c5)
modelsummary(models,  vcov = vcov_fun, coef_omit  = "year|sic_two|permno", estimate   = "{estimate}{stars}", statistic  = "({statistic})", stars      = c("*" = 0.10, "**" = 0.05, "***" = 0.01), gof_omit   = "AIC|BIC|Log.Lik", fmt = 4, output = "latex", title = "Guidance Bias and CGO", escape  = FALSE)
rm(c1, c2, c3, c4, c5, se_1, se_2, se_3, se_4, se_5)


